#coding=utf-8
#!/usr/bin/env python
from __future__ import unicode_literals
__author__ = 'peng'

import os,sys
sys.path.append('..')
from whoosh_helper import pdf_whoosh

reload(sys)
sys.setdefaultencoding('utf-8')

def walk1(rootdir):
    for root, dirs, files in  os.walk(rootdir):
        for f in files:
            filename = os.path.join(root, f)
            print(filename), os.path.splitext(filename)
            if os.path.splitext(filename)[1] == '.pdf' and not pdf_whoosh.md5_exists(filename):
                pdf_whoosh.save(filename)
                print('%s to text successed' % filename)

from mongoer import dbobjects
watch_path = dbobjects.settings.find_one({'name':'watch_dir'})['value']

walk1(watch_path)